---
title: |
  | Pre-analysis Plans: An Early Stocktaking
  | Codebook
author: |
  | George K. Ofosu and Daniel N. Posner^[Ofosu: Department of Government, London School of Economics and Political Science, Room CBG 3.04, Houghton Street, London WC2A 2AE, UK. Email:g.ofosu@lse.ac.uk. Posner: UCLA Department of Political Science, 4289 Bunche Hall, Los Angeles, CA 90095-1472. Email: dposner@polisci.ucla.edu.]
date: "`r format(Sys.Date(),'%B %d, %Y')`"
header-includes:
    - \usepackage{setspace}\onehalfspacing
output:
  pdf_document: default
  theme: journal
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

 
\newpage
```{r results = 'hide', message=FALSE,echo=FALSE, eval=TRUE, warning=FALSE}
# R version 4.0.1 (2020-06-06)
#install packages
#install.packages(c("memisc", "tidyverse"))

#load required libraries
library(memisc) # memisc_0.99.22
library(tidyverse) # tidyverse_1.3.0


# set working directory
#setwd("")


#load dataset

pap_dat <- read_csv('master_pap_data08202019.csv')

pap_dat <- pap_dat%>%
  mutate(year=factor(year),
         registry = factor(registry),
         publication = factor(publication),
         `type of study`=factor(`type of study`)
        )



data <- data.set(pap_dat)
class(data)
names(data) <- names(pap_dat)

```

```{r results = 'hide',message=FALSE, echo=FALSE, eval=TRUE, warning=FALSE}
data <- within(data, {
  description(papcode_a) <- "PAP ID"
description(year) <- "Year of registration"
description(registry) <- "Registry"
description(is.private) <-  "PAP was privately registered"
description(publication) <- "Publication status of PAP"
description(`type of study`) <- "Type of Study"
description(clearhyp) <- "PAP had clear hypotheses"
description(morethanonehyp) <-  "PAP specified more than one hypothesis"                
description(morethanonehyp_unclear) <- "Unclear whether PAP specified more than one hypothesis"
description(howmanyhyp) <- "Number of hypotheses specified"
description(howmanyhyp_unclear) <- "Number of hypotheses was unclear"
description(primary) <- "PAP distinguished primary verus secondary hypothesis"
description(howmanyprimary) <- "Number of primary hypotheses"
description(howmanyprimary_unclear) <-"Number of primary hypotheses unclear"
description(presentedpimarymainbody) <- "Number of primary hypotheses presented in main body published paper"
description(presentedpimarymainbody_unclear) <- "Number of primary hypotheses presented in main body of paper unclear"
description(presentedprimaryappendix) <- "Number of primary hypotheses presented in the appendix of paper"
description(presentedprimaryappendix_rev) <- "Number of primary hypotheses presented in the appendix of paper (revised)"
description(presentedprimaryappendix_unclear) <- "Number of primary hypotheses presented in appendix unclear"
description(presentedprimarysupportedmainbody) <- "Number of primary hypotheses presented in the main body of article supported by analysis"
description(presentedprimarysupportedmainbody_unclear) <- "Unclear the number of hypotheses presented in teh main body supported by analysis"
description(presentedprimarysupportedappendix) <-"Number of primary hypotheses presented in appendix supported by the analyis"
description(presentedprimarysupportedappendix_rev) <- "Number of primary hypotheses presented in the appendix supported by the analyis (revised)"
description(presentedprimarysupportedappendix_unclear) <-"Unclear: number of primary hypotheses presented in the appendix supported by the analyis"
description(presentedsecondarymainbody) <- "Number of secondary hypotheses presented in the main body of the paper"
description(presentedsecondaryappendix) <- "Number of secondary hypotheses presented in the appendix of paper"
description(newhyp) <- "New hypotheses reported in published article"
description(pointoutnew) <- "Author(s) acknowledge mew hypotheses"
description(changevar) <- "There was change in the independent variable(s) reported in paper compared to that in PAPs"
description(describechangedepvar) <- "Author described change in the dependent variable(s)"
description(dec_var_change_description) <- "Description of changes in dependent variable"
description(clearsecdepvar) <- "Secondary dependent variables are clearly specified"
description(chansecgedepvar) <-"Change in secondary dependent variable in article"
description(describechangesecdepvar) <- "Author(s) indicate changes to dependent variable in article"
description(sec_dep_var_change_description) <- "Decription of changes to dependent variable"
description(ncontrols) <-  "Number of control variables specified"
description(ncontrols_unclear) <-  "Unclear: number of controls specified"
description(ncontrolsclear) <- "Control variables are clearly specified"
description(popofinterest) <- "PAP specifies population of interest"
description(samplingframe) <-"PAPs specifies the sampling frame"
description(samplingstrategy) <- "PAP specifies sampling strategy"
description(condexclude) <-"PAPs specifies how units/cases will be excluded from study sample"
description(poweranalysis) <-  "Power analysis is used to justify sample size"
description(treatmentundercontrol) <- "Researcher controls treatment assignment"
description(treatmentundercontrol_unclear) <- "Unclear whether treatment was under researcher control"
description(randproc) <- "Specifies randomization procedure"
description(manipcheck) <- "PAP specify a manipulation check (i.e. variables to report in a balance table)"
description(imbalance) <-"PAP specify what to do in the event of an imbalance"
description(rulesmissing) <- "PAP specify rules on how to handle missing values and attrition"
description(rulesoutliers) <- "PAP specify rules for dealing with outliers"
description(rulesnoncomp) <- "PAP specify rules for dealing with noncompliance"
description(followmissing) <-"Paper follow the pre-specified protocols with respect to missing values and attrition"
description(followoitliers) <- "Paper follow the pre-specified protocols with respect to dealing with outliers"
description(follownoncomp) <-  "Paper follow the pre-specified protocols with respect to noncompliance"
description(statmodel) <-  "PAP specify statistical model"
description(anystatdevs) <- "Deviation from specified statistical model in article"
description(statdevspointedout) <-  "Author(s) points out deviation from pre-specified statistical model"
description(statdevdescr) <- "Description of changes to the statistical model"
description(stderrors) <-  "PAP state how standard errors will be treated (robust, clustering, bootstrapping,or other techniques)"
description(multipletest) <- "PAP specify a multiple testing adjustment" 
description(difinmeans) <- "PAP commit the researchers to presenting a simple difference-in-means test"
description(onetailed) <- "Specify whether the test will be one-tailed or two-tailed"
description(covars) <- "Specifies whether and how covariates will be included in analysis"
description(whenregistered) <- "Stage in the research process when PAPs was filed"
description(evergated) <- "PAP was gated or private"
description(updated) <- "PAPs was updated"
description(howmanyupdates) <- "Number of updates"
description(clearchanges) <- "Changes to PAPs were clearly marked"
description(irb) <- "Specifies study has received IRB approval"
description(sop) <- "Pre-specify what will guide their decisions when issues arise that were notanticipated in the PAP (i.e., the SOPs described in Lin and Green 2015)"
description(pages) <- "Number of pages of PAP in single space"
description(descriptionofchanges) <- "Description of changes to PAP"
description(datalink) <- "Paper provided a link to data set"
description(dofilelink) <- "PAPs included do file"
description(instruments) <- "PAP included survey or research instrument"
description(comment) <- "Additional comment on coding"
description(coder) <- "Coder"
description(primary_secondary_hyp_maintained) <- "Distinction between primary and secondary hypotheses was maintained in article"
description(clearindvar) <- "Primary independent/treatment is clearly specified"
description(changeindvar) <- "Primary independent/treatment was changed in article"
description(describechangeindvar) <- "Author(s) described changes in the independent variable"
description(dec_indvar_change_description) <- "Description of changes in the independent variable"
description(seconddepvarsclear) <-"Secondary dependent variable clearly specified"
description(ncontrolclear) <-"Number of controls clearly specified"
description(cleardepvar) <-"Primary dependent variabe was specified sufficiently clearly"
foreach(x=c(is.private,clearhyp,morethanonehyp,howmanyhyp_unclear,presentedpimarymainbody_unclear,presentedprimaryappendix_unclear,newhyp,pointoutnew,changevar,describechangedepvar,clearsecdepvar,chansecgedepvar,ncontrols_unclear,popofinterest,samplingframe,samplingstrategy,condexclude,poweranalysis,treatmentundercontrol,treatmentundercontrol_unclear,randproc,manipcheck, imbalance,rulesmissing,rulesoutliers,rulesnoncomp,followmissing,followoitliers,follownoncomp, statmodel,anystatdevs,statdevspointedout,stderrors,multipletest,difinmeans,onetailed,covars,evergated,clearchanges,irb,sop,datalink,instruments,primary_secondary_hyp_maintained,clearindvar,changeindvar,seconddepvarsclear,ncontrolclear,cleardepvar,morethanonehyp_unclear),{measurement(x) <- "ordinal"                                    })
 foreach(x=c(is.private,clearhyp,morethanonehyp,howmanyhyp_unclear,presentedpimarymainbody_unclear,presentedprimaryappendix_unclear,newhyp,pointoutnew,changevar,describechangedepvar,clearsecdepvar,chansecgedepvar,ncontrols_unclear,popofinterest,samplingframe,samplingstrategy,condexclude,poweranalysis,treatmentundercontrol,treatmentundercontrol_unclear,randproc,manipcheck, imbalance,rulesmissing,rulesoutliers,rulesnoncomp,followmissing,followoitliers,follownoncomp, statmodel,anystatdevs,statdevspointedout,stderrors,multipletest,difinmeans,onetailed,covars,evergated,clearchanges,irb,sop,datalink,instruments,primary_secondary_hyp_maintained,clearindvar,changeindvar,seconddepvarsclear,ncontrolclear,cleardepvar,morethanonehyp_unclear),{labels(x) <- c("Yes" = 1,                                              "No" = 0,                                            "Missing" = NA                                     )
    })
})

```


# Description of variables
```{r message=FALSE,echo=FALSE, eval=TRUE,warning=FALSE}
memisc::codebook(data)
```



